This notebook demonstrates the Australian Water Observations from Space (WOFS) algorithm but uses Sentinel-2 instead of Landsat. This water detection algorithm is an improvement over the Landsat QA water flag or the NDWI index for water identification. For more information, visit this website:
import datacube
dc = datacube.Datacube(app='Water_Observations_from_Space')
from datacube.utils import masking
import sys, os
os.environ['USE_PYGEOS'] = '0'
import datetime
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
from dea_tools.plotting import rgb, display_map
from dea_tools.bandindices import calculate_indices
### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system
Successfully found configuration for deployment "eail"
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))
a95524e7
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 8 | Total memory: 29.00 GiB |
| Status: running | Using processes: True |
Scheduler-33979698-f0ff-45ef-be16-ee256470b9ed
| Comm: tcp://127.0.0.1:39881 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 8 |
| Started: Just now | Total memory: 29.00 GiB |
| Comm: tcp://127.0.0.1:39245 | Total threads: 2 |
| Dashboard: http://127.0.0.1:32811/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:46367 | |
| Local directory: /tmp/dask-worker-space/worker-if7j1mo_ | |
| Comm: tcp://127.0.0.1:36721 | Total threads: 2 |
| Dashboard: http://127.0.0.1:33527/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:34615 | |
| Local directory: /tmp/dask-worker-space/worker-ui0nvnw1 | |
| Comm: tcp://127.0.0.1:46425 | Total threads: 2 |
| Dashboard: http://127.0.0.1:34175/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:37031 | |
| Local directory: /tmp/dask-worker-space/worker-247ycyhp | |
| Comm: tcp://127.0.0.1:36145 | Total threads: 2 |
| Dashboard: http://127.0.0.1:38859/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:33307 | |
| Local directory: /tmp/dask-worker-space/worker-as7dc2j7 | |
https://hub.eail.easi-eo.solutions/user/jhodge/proxy/8787/status
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)
<botocore.credentials.Credentials at 0x7fed5b5c00d0>
# Define the Product
product = "s2_l2a"
# Select an analysis region (Latitude-Longitude)
# Select a time period within the extents of the dataset (Year-Month-Day)
# Mombasa, Kenya
# latitude = (-4.05, -3.95)
# longitude = (39.60, 39.68)
# latitude=easi.latitude
# longitude=easi.longitude
latitude = (36.3, 36.5)
longitude = (-114.325, -114.43)
# Define Time Range
# Landsat-8 time range: 07-Apr-2013 to current
time_extents = ('2021-01-01', '2021-12-31')
# The code below renders a map that can be used to view the analysis region.
display_map(longitude,latitude)
/env/lib/python3.10/site-packages/dea_tools/plotting.py:313: FutureWarning: This function is deprecated. See: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1
all_longitude, all_latitude = transform(Proj(crs), Proj('EPSG:4326'), all_x,
measurements = ['red', 'green', 'blue', 'nir', 'swir_1', 'swir_2', 'SCL']
data_names = measurements.copy()
data_names.remove('SCL')
s2_dataset = dc.load(latitude = latitude,
longitude = longitude,
time = time_extents,
product = product,
output_crs = 'epsg:6933',
resolution = (-10,10),
measurements = measurements,
group_by = 'solar_day',
dask_chunks = {'time':1})
s2_dataset
<xarray.Dataset>
Dimensions: (time: 73, y: 2064, x: 1014)
Coordinates:
* time (time) datetime64[ns] 2021-01-05T18:34:07 ... 2021-12-31T18:...
* y (y) float64 4.355e+06 4.355e+06 ... 4.334e+06 4.334e+06
* x (x) float64 -1.104e+07 -1.104e+07 ... -1.103e+07 -1.103e+07
spatial_ref int32 6933
Data variables:
red (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
green (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
blue (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
nir (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
swir_1 (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
swir_2 (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
SCL (time, y, x) uint8 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
Attributes:
crs: epsg:6933
grid_mapping: spatial_refcloud_mask = (s2_dataset.SCL != 0) & (s2_dataset.SCL != 1) & \
(s2_dataset.SCL != 3) & (s2_dataset.SCL != 8) & \
(s2_dataset.SCL != 9) & (s2_dataset.SCL != 10)
s2_dataset=s2_dataset.rename_vars({'swir_1':'swir1','swir_2':'swir2'})
s2_dataset
<xarray.Dataset>
Dimensions: (time: 73, y: 2064, x: 1014)
Coordinates:
* time (time) datetime64[ns] 2021-01-05T18:34:07 ... 2021-12-31T18:...
* y (y) float64 4.355e+06 4.355e+06 ... 4.334e+06 4.334e+06
* x (x) float64 -1.104e+07 -1.104e+07 ... -1.103e+07 -1.103e+07
spatial_ref int32 6933
Data variables:
red (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
green (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
blue (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
nir (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
swir1 (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
swir2 (time, y, x) uint16 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
SCL (time, y, x) uint8 dask.array<chunksize=(1, 2064, 1014), meta=np.ndarray>
Attributes:
crs: epsg:6933
grid_mapping: spatial_refs2_dataset = s2_dataset.compute()
/env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject(
# WOFS is written for Landsat Collection 1, so we need to scale the Collection 2 data to look like collection 1
# This is stolen from https://github.com/GeoscienceAustralia/wofs/blob/master/wofs/virtualproduct.py
# Given that we are using Sentinel-2 data, this won't actually scale the data, but it will treat the data in the same way so that it is in the right format for the classifier
def scale_usgs_collection2(data):
"""These are taken from the Fractional Cover scaling values"""
attrs = data.attrs
data = data.apply(scale_and_clip_dataarray, keep_attrs=False,
scale_factor=1, add_offset=0, # Don't actually scale or offset the data
clip_range=None, valid_range=(0, 10000))
data.attrs = attrs
return data
def scale_and_clip_dataarray(dataarray: xr.DataArray, *, scale_factor=1, add_offset=0, clip_range=None,
valid_range=None, new_nodata=-999, new_dtype='int16'):
orig_attrs = dataarray.attrs
nodata = dataarray.attrs['nodata']
mask = dataarray.data == nodata
# add another mask here for if data > 10000 then also make that nodata
dataarray = dataarray * scale_factor + add_offset
if clip_range is not None:
dataarray = dataarray.clip(*clip_range)
dataarray = dataarray.astype(new_dtype)
dataarray.data[mask] = new_nodata
if valid_range is not None:
valid_min, valid_max = valid_range
dataarray = dataarray.where(dataarray >= valid_min, new_nodata)
dataarray = dataarray.where(dataarray <= valid_max, new_nodata)
dataarray.attrs = orig_attrs
dataarray.attrs['nodata'] = new_nodata
return dataarray
s2_dataset_scaled = scale_usgs_collection2(s2_dataset) # Use the same function for converting landsat data from collection 2 to collection 1 values, but use scale and offset of 1 and 0.
from ceos_utils.data_cube_utilities.dc_water_classifier import wofs_classify
ts_water_classification = wofs_classify(s2_dataset_scaled,clean_mask = cloud_mask.values, no_data=0, x_coord='x', y_coord='y')
/home/jovyan/cal-notebooks/examples/../scripts/ceos_utils/data_cube_utilities/dc_water_classifier.py:136: RuntimeWarning: divide by zero encountered in divide return (a - b) / (a + b)
# Apply nan to no_data values
ts_water_classification = ts_water_classification.where(ts_water_classification != -9999).astype(np.float16)
# Time series aggregation that ignores nan values.
water_classification_percentages = (ts_water_classification.mean(dim = ['time']) * 100).wofs.rename('water_classification_percentages')
# import color-scheme and set nans (no data) to black
from matplotlib.cm import jet_r
jet_r.set_bad('black',1)
img_scale = water_classification_percentages.shape[0]/water_classification_percentages.shape[1]
# This is where the WOFS time series product is generated.
# Areas of RED have experienced little or no water over the time series
# Areas of BLUE have experience significant or constant water over the time series
figsize=6
water_classification_percentages.plot(cmap = jet_r, figsize=(figsize,figsize*img_scale))
plt.title("Percent of Samples Classified as Water")
plt.axis('off')
plt.show()